# coding:utf8
from bs4 import BeautifulSoup
import re
class HtmlParser(object):
    def parser_url(self,pre_url,html_cont):
        soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8')
        link=soup.find_all("a",href=re.compile("\?start="))
        book_href=soup.find_all("a",class_="title",target="_blank")
        linklist=[]
        booklist=[]
        for i in link:
            linklist.append(pre_url[0]+i['href'])
        for b in book_href:
            booklist.append(b['href'])
        return linklist,booklist

    def parser_content(self,html_cont):
        soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8')
        score=soup.find_all("strong",property="v:average")
        titile=soup.find_all("span",property="v:itemreviewed")

        def parse(s):
            c = re.compile(r'<span class="pl">(.*?):</span> (.*?)<br/>', re.M)
            for i in c.findall(s):
                book_info.append(i[0]+":"+i[1])

        book_info=[]
        book_info.append("标题:"+titile[0].get_text())
        parse(html_cont)
        book_info.append("豆瓣评分:"+score[0].get_text())
        book_info.append("")

        return book_info
